{
    "cells": [
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": [
                "## Installation"
            ]
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": [
                "### Clone the repository"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {
                "vscode": {
                    "languageId": "plaintext"
                }
            },
            "outputs": [],
            "source": [
                "!git clone https://github.com/metavoiceio/metavoice-src.git\n",
                "%cd metavoice-src"
            ]
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": [
                "### Install dependencies"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {
                "vscode": {
                    "languageId": "plaintext"
                }
            },
            "outputs": [],
            "source": [
                "!sudo apt install pipx\n",
                "!pipx install poetry\n",
                "!pipx run poetry install && pipx run poetry run pip install torch==2.2.1 torchaudio==2.2.1\n",
                "!pipx run poetry env list | sed 's/ (Activated)//' > poetry_env.txt\n",
                "# NOTE: pip's dependency resolver will error & complain, ignore it!\n",
                "# its due to a temporary dependency issue, `tts.synthesise` will still work as intended!"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {
                "vscode": {
                    "languageId": "plaintext"
                }
            },
            "outputs": [],
            "source": [
                "import sys, pathlib\n",
                "venv = pathlib.Path(\"poetry_env.txt\").read_text().strip(\"\\n\")\n",
                "sys.path.append(f\"/root/.cache/pypoetry/virtualenvs/{venv}/lib/python3.10/site-packages\")"
            ]
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": [
                "## Inference"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {
                "vscode": {
                    "languageId": "plaintext"
                }
            },
            "outputs": [],
            "source": [
                "from IPython.display import Audio, display\n",
                "from fam.llm.fast_inference import TTS\n",
                "\n",
                "tts = TTS()"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {
                "vscode": {
                    "languageId": "plaintext"
                }
            },
            "outputs": [],
            "source": [
                "wav_file = tts.synthesise(\n",
                "  text=\"This is a demo of text to speech by MetaVoice-1B, an open-source foundational audio model.\",\n",
                "  spk_ref_path=\"assets/bria.mp3\" # you can use any speaker reference file (WAV, OGG, MP3, FLAC, etc.)\n",
                ")\n",
                "display(Audio(wav_file, autoplay=True))"
            ]
        }
    ],
    "metadata": {
        "accelerator": "GPU",
        "colab": {
            "gpuType": "T4",
            "provenance": []
        },
        "kernelspec": {
            "display_name": "Python 3",
            "name": "python3"
        },
        "language_info": {
            "name": "python"
        }
    },
    "nbformat": 4,
    "nbformat_minor": 2
}
